#files and directory paths
csvLocation = "dataset_working/feature_extraction/feature_extract_srinath.csv"
setwd('D:/HP_Win10_OneDrive/Study/OVGU/University/Summer-2021/DSR/Github/DataScienceR') # change path accordingly
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.1.1 v stringr 1.4.0
## v tidyr 1.1.3 v forcats 0.5.1
## v readr 1.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(tm)
## Loading required package: NLP
##
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
##
## annotate
library(ggplot2)
library(syuzhet)
#install.packages("plotly")
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
#install.packages("treemapify")
library(treemapify)
#Read the features extracted
features_Data <- read.csv(file = csvLocation)
glimpse(features_Data)
## Rows: 29,800
## Columns: 18
## $ Author_Id <chr> "06ct0t68y1acizh9eow3g5rhancrppr8", "06ct0t68y1acizh9eow3~
## $ Tweet_Id <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17~
## $ Tweet_Text <chr> "courteney cox recreates classic friends scene real life ~
## $ Target <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~
## $ Syuzhet <dbl> 0.50, 0.80, 3.15, 0.40, 1.15, 0.75, 0.00, 0.50, 0.20, 0.0~
## $ anger <int> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, ~
## $ anticipation <int> 0, 0, 0, 0, 3, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 4, 1, 0, 0, ~
## $ disgust <int> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ~
## $ fear <int> 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 3, 0, 0, ~
## $ joy <int> 0, 0, 2, 0, 2, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 3, 0, 1, 0, ~
## $ sadness <int> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, ~
## $ surprise <int> 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, ~
## $ trust <int> 1, 1, 1, 0, 2, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 0, ~
## $ positive <int> 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, ~
## $ negative <int> 2, 0, 2, 1, 4, 1, 0, 0, 1, 0, 1, 1, 1, 0, 3, 3, 1, 1, 2, ~
## $ Bing <int> 0, 0, 3, 0, -1, 1, 0, 1, -1, 0, 0, 1, 1, -1, 2, 1, -2, 1,~
## $ Afinn <int> 0, 0, 8, 0, 0, 3, 0, 3, 2, 3, 0, 3, 3, 2, 6, 0, -2, 2, 0,~
## $ Nrc <int> 2, 0, 2, 1, 3, 0, 0, 0, 1, 0, 1, 1, 1, -1, 3, 3, 0, 0, 2,~
#Start of sentiments visualizations
sentiments_df <- features_Data %>%
select(Syuzhet, Bing, Afinn, Nrc)
sentiment_types_df <- data.frame(matrix(ncol=4,nrow=0, dimnames=list(NULL, c("syuzhet", "bing", "afinn", "nrc"))))
syuzhet = 0
bing = 0
afinn = 0
nrc = 0
for (row in 1:nrow(sentiments_df)) {
syuzhet = syuzhet + sentiments_df[row, c(1)]
bing = bing + sentiments_df[row, c(2)]
afinn = afinn + sentiments_df[row, c(3)]
nrc = nrc + sentiments_df[row, c(4)]
if(row %% 100 == 0){
sentiment_types_df[nrow(sentiment_types_df) + 1,] = c(syuzhet/100, bing/100, afinn/100, nrc/100)
syuzhet = 0
bing = 0
afinn = 0
nrc = 0
}
}
plot_ly(sentiment_types_df, y=~syuzhet, type="scatter", mode="jitter", name="syuzhet") %>%
add_trace(y=~bing, mode="lines", name="bing") %>%
add_trace(y=~afinn, mode="lines", name="afinn") %>%
add_trace(y=~nrc, mode="lines", name="nrc") %>%
layout(title="Different type of sentiments for Author Tweets",
yaxis=list(title="Score"), xaxis=list(title="Number of tweets"))
#End of sentiments visualizations
#Start of Emotions Visualizations
emotions_df <- features_Data %>%
select(anger, anticipation, disgust, fear, joy, sadness, surprise, trust)
emotions_df_column_count = colSums(emotions_df)
emotions_df_column_count_df = data.frame(count=emotions_df_column_count, Different_Emotions=names(emotions_df_column_count))
emotions_df_column_count_df <- emotions_df_column_count_df %>%
mutate(percent = count / sum(count) * 100)
##Start of Pie chart
ggplot(emotions_df_column_count_df,
aes(x = "",
y = percent,
fill = Different_Emotions)) +
geom_bar(width = 1,
stat = "identity",
color = "black") +
geom_text(aes(label = paste0(Different_Emotions, "\n", round(percent,2))),
position = position_stack(vjust = 0.5),
color = "black") +
coord_polar("y",
start = 0,
direction = -1) +
theme_void() +
theme(legend.position = "FALSE") +
labs(title = "Tweets Emotion Analysis")

##End of Pie chart
#End of Emotions Visualizations
#Start of Positive Visualizations
positive_df <- features_Data %>%
select(positive, negative)
positive_df_column_count = colSums(positive_df)
positive_df_column_count_df = data.frame(count=positive_df_column_count, Sentiment=names(positive_df_column_count))
positive_df_column_count_df <- positive_df_column_count_df %>%
mutate(percent = count / sum(count) * 100)
qplot(Sentiment, data=positive_df_column_count_df, weight=percent, geom="bar",fill=Sentiment, ylab="Percentage", xlab="Sentiment")+ggtitle("Tweets Sentiment Analysis")

#End of Negative Visualizations
#Start of Tweet Analysis Based On Author ID
authorId <- 99
getAuthorBasedTweets <- function(author_Id){
start <- (author_Id * 100) - 100
end <- author_Id * 100
author_features_Data <- features_Data %>%
slice(start:end)
author_emotions_df <- author_features_Data %>%
select(anger, anticipation, disgust, fear, joy, sadness, surprise, trust)
author_emotions_df_column_count = colSums(author_emotions_df)
author_emotions_df_column_count_df = data.frame(count=author_emotions_df_column_count, Different_Emotions=names(author_emotions_df_column_count))
author_emotions_df_column_count_df <- author_emotions_df_column_count_df %>%
mutate(percent = count / sum(count) * 100)
qplot(Different_Emotions, data=author_emotions_df_column_count_df, weight=percent, geom="bar",fill=Different_Emotions, ylab="Percentage", xlab="Emotions")+ggtitle("Author Emotion Analysis")
}
getAuthorBasedTweets(authorId)

#End of Tweet Analysis Based On Author ID